library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(googlesheets4)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(ggridges)
Data! Thanks Collier
Setting up each country to have a color
CountryColors = tibble(
Country = c("USA","France","Italy","Israel","Spain","Chile" )
,color = c("rgba(255,255,255,1)","rgba(255,0,255,1)","rgba(0,255,255,1)"
,"rgba(255,255,0,1)","rgba(255,0,0,1)","rgba(0,0,255,1)")
)
Cleaning the data:
# head(WineData)
# Remove empty rows at the bottom
WineData = WineData[is.na(WineData$Name)==FALSE,]
# Add average to the no vintage wine
MeanVintage = mean(na.omit(as.numeric(WineData$Vintage)))
## Warning in na.omit(as.numeric(WineData$Vintage)): NAs introduced by coercion
## Warning in na.omit(as.numeric(WineData$Vintage)): NAs introduced by coercion
# clean data: split country out, add vintage as all numeric
WineData = WineData %>%
mutate(
CountryClean = str_split(Country, ", ") # split region and country into list
,Vintage = as.vector(Vintage) #clean vintage from blended vector (dbl & chr)
,VintageClean = as.numeric(Vintage) # set data to numeric to call out NAs
) %>%
unnest_wider(CountryClean) %>% # split region and country list into two columns
rename("Region" = ...1
,"CountryClean" = ...2) %>% #rename columns from unnest
rowwise() %>%
mutate(
VintageClean = ifelse(is.na(VintageClean) == TRUE, MeanVintage, Vintage) #set unknown vintage to average
,CountryColor = CountryColors$color[match(CountryClean,CountryColors$Country)] #add color for plotting
,Size = (log(`Total Price`+1, exp(.1))+1)/2
)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
## New names:
## * `` -> ...1
## * `` -> ...2
Let’s take a look at the data after getting it cleaned
head(WineData, 20)
## # A tibble: 20 x 16
## # Rowwise:
## Name Vintage Type Country Price `Bottles remain~ `Grower/bottler`
## <chr> <list> <chr> <chr> <dbl> <dbl> <chr>
## 1 Opus One <dbl [1~ Red Californ~ 372 5 Opus One
## 2 Opus One <dbl [1~ Red Californ~ 139 6 Opus One
## 3 Overture <chr [1~ Red Californ~ 139 3 Opus One
## 4 Opus One <dbl [1~ Red Californ~ 479 2 Opus One
## 5 Opus One <dbl [1~ Red Californ~ 480 1 Opus One
## 6 Silver O~ <dbl [1~ Cabern~ Napa Val~ 81 1 Silver Oak
## 7 Silver O~ <dbl [1~ Cabern~ Napa Val~ 148 2 Silver Oak
## 8 Chateau ~ <dbl [1~ Bordea~ Pessac-L~ 965 1 Chateau La Missi~
## 9 Omina <dbl [1~ Cabern~ Lazio, I~ 173 1 Omina Romana
## 10 Omina <dbl [1~ Viogni~ Lazio, I~ 0 2 Omina Romana
## 11 Gianni B~ <dbl [1~ Brunel~ Montalci~ 35 2 Gianni Brunelli
## 12 Gianni B~ <dbl [1~ Brunel~ Montalci~ 118 3 Gianni Brunelli
## 13 Gianni B~ <dbl [1~ Brunel~ Montalci~ 118 3 Gianni Brunelli
## 14 Quintess~ <dbl [1~ Bordea~ Napa Val~ 200 2 Quintessa
## 15 Titus - ~ <dbl [1~ Cabern~ Napa Val~ 90 9 Titus
## 16 Titus <dbl [1~ Merlot Napa Val~ 52 1 Titus
## 17 Diamond ~ <dbl [1~ Cabern~ Napa Val~ 230 1 Diamond Creek
## 18 Dakota S~ <dbl [1~ Cabern~ Napa Val~ 160 23 Dakota Shy
## 19 Peju <dbl [1~ Zinfan~ Napa Val~ 49.5 1 Peju
## 20 Peju - T~ <dbl [1~ Red Bl~ Napa Val~ 77 1 Peju
## # ... with 9 more variables: Where accquired <lgl>, Bottle size <lgl>,
## # Total Price <dbl>, Where <chr>, Region <chr>, CountryClean <chr>,
## # VintageClean <dbl>, CountryColor <chr>, Size <dbl>
Bubble Chart!
Still have a few bits to edit - Size is off - Colors are off
plot_ly(
data = WineData
,x = ~VintageClean
,y = ~Price
,name = ~CountryClean
,color = ~CountryColor
,text = ~paste0("Brand: ", `Grower/bottler`
,"</br></br>Name: ", Name
,"</br>Country: ", CountryClean
,"</br>Vintage: ", Vintage
,"</br>Price: ", Price
,"</br>Bottles: ", `Bottles remaining`
,"</br>Total Value: ", `Total Price`
)
,hoverinfo = 'text'
,type = "scatter"
,mode = "markers"
,marker = list(
size = ~Size
,opacity = .7
)
)
Rdigeline histogram chart!
Fun way to compare the bottle prices by country
WineData %>%
mutate(text = fct_reorder(CountryClean,Price)) %>%
ggplot(
aes(y = text, x = Price, fill = text)
) +
geom_density_ridges(alpha = .6, stat = "binline", bins = 20) +
ggridges::theme_ridges() +
theme(
legend.position="none",
panel.spacing = unit(6, "lines"),
strip.text.x = element_text(size = 8)
) +
xlab("Price ($)") +
ylab("")